1 Data loading

source("../rscripts/merge_load_set4_Yanni_singlepulse.R")
source("../rscripts/package.R")

Plotting with stimulation signal:

Yanni_sp2 <- merge(Yanni_sp, StimYanni_sp, by = c("Condition", "RealTime"), allow.cartesian = T)
Yanni_sp2[, Condition := factor(Yanni_sp2$Condition, levels = levels(Yanni_sp$Condition))]
Yanni_sp2$Pulse_intensity.norm <- ifelse(Yanni_sp2$Pulse_intensity.norm > 3, 1.3, 0.95)
ggplot(Yanni_sp2, aes(x = RealTime)) + geom_line(aes(y=Ratio.norm, group=Label)) + geom_line(aes(y = Pulse_intensity.norm), col = "blue") + facet_wrap("Condition", ncol = 4) + stat_summary(aes(y=Ratio.norm), fun.y = mean, geom = "line", color = "red", size = 1.5)  + theme(text = element_text(size = 25))

rm(Yanni_sp2)

Cut for times > 180min to have all conditions with same length.

Yanni_sp <- Yanni_sp[RealTime <= 180]
setkey(Yanni_sp, Condition, Label)

2 PCA with data centered on initial peak (40 to 75 min)

perform.PCA <- function(data, color = "Condition", na.fill = 1.2, value.var = "Ratio.norm", exp.var = c("Condition", "Label"), resp.var = "RealTime", plot.pca = F){
  require(ggbiplot)
  cast <- cast.and.fill(data, exp.var, resp.var, na.fill, value.var)
  pca <- prcomp(cast$mat2, center = T, scale = T)
  if(plot.pca) plot(pca)
  
  g <- ggbiplot(pca, obs.scale = 1, var.scale = 1, 
              groups = unlist(cast$mat[, color]), ellipse = TRUE, 
              circle = TRUE, choices = c(1,2))
  g <- g + scale_color_discrete(name = '')
  g <- g + theme(legend.direction = 'horizontal', 
               legend.position = 'top')
  return(g)
}

cast.and.fill <- function(data, exp.var = c("Condition", "Label"), resp.var = "RealTime", na.fill = 1.2, value.var = "Ratio.norm"){
  formula <- as.formula(paste0(paste(exp.var, collapse = " + "), " ~ ", resp.var))
  mat <- as.matrix(dcast(data, formula, value.var = value.var))
  mat2 <- mat[,-(1:length(exp.var))]
  class(mat2) <- "numeric"
  mat2[which(is.na(mat2))] <- na.fill
  return(list(mat = mat, mat2 = mat2))
}

2.1 With all conditions pooled

perform.PCA(Yanni_sp[RealTime >= 40 & RealTime <= 75], plot.pca = T) + ggtitle("All pooled") + theme(text = element_text(size = 25))

Elbow method shows that considering only first 2 PC is fine.

EGF is taken appart along 2nd PC which is highly correlated with the very first time points. This is because EGF amplitude reaches a much larger value quicker than NGF or FGF. The 1st PC corresponds more to the “after-peak period”. Interestingly though it explains 50% of the variance, it cannot take appart the different conditions.

To understand it a bit better, try to color with different variables like length of pulse and concentrations:

Yanni_sp_pca <- copy(Yanni_sp)
Yanni_sp_pca[, c("gf","pulse_length","concentration") := list(str_extract(Condition, "[ENF]"), str_extract(Condition, "[0-9]+$"), str_extract(Condition, "[0-9]+(\\.[0-9]+)?"))]

perform.PCA(data = Yanni_sp_pca[RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "pulse_length")  + ggtitle("All pooled - Color Pulse length") + theme(text = element_text(size = 25))

Lots of 10min on 2nd PC, but this isn’t very interesting since EGF and NGF are exclusively observed at 10min pulses.

perform.PCA(data = Yanni_sp_pca[RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "concentration")  + ggtitle("All pooled -  Color Concentration GF") + theme(text = element_text(size = 25))

1st PC is probably driven by “flat profiles” obtain for very low concentrations.

perform.PCA(data = Yanni_sp_pca[RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "gf")  + ggtitle("All pooled -  Color GF") + theme(text = element_text(size = 25))

Even with all conditions pooled together, we see that the 2nd PC very clearly take the GF appart.

2.2 With the 3 GF at 10min pulse

perform.PCA(Yanni_sp[Condition %in% levels(Condition)[1:12] & RealTime >= 40 & RealTime <= 75], plot.pca = T) + ggtitle("E,N,F 10min pulse") + theme(text = element_text(size = 25))

Again we consider only 2 first PC.

1st PC hardly takes appart anything but N0.25-P10, so 1st PC is indeed probably driven by flat profiles. 2nd PC provides an interesting separation for the different GF.

perform.PCA(data = Yanni_sp_pca[Condition %in% levels(Condition)[1:12] & RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "concentration")  + ggtitle("All pooled -  Color concentration") + theme(text = element_text(size = 25))

perform.PCA(data = Yanni_sp_pca[Condition %in% levels(Condition)[1:12] & RealTime >= 40 & RealTime <= 75], exp.var = c("Condition", "Label", "pulse_length", "concentration", "gf"), resp.var = "RealTime", color = "gf")  + ggtitle("All pooled -  Color GF") + theme(text = element_text(size = 25))

NGF displays largest variability in these conditions, what is the opposite observation to the one in the sustained exposition dataset. EGF on the contrary is extremely compact, showing a consistent response whatever concentration with 10min pulse.

2.3 With EGF only, 10min pulse

perform.PCA(Yanni_sp[Condition %in% levels(Condition)[1:4] & RealTime >= 40 & RealTime <= 75]) + ggtitle("EGF 10min pulse") + theme(text = element_text(size = 20))

Can’t really differentiate the concentrations

2.4 With NGF only, 10min pulse

perform.PCA(Yanni_sp[Condition %in% levels(Condition)[5:8] & RealTime >= 40 & RealTime <= 75]) + ggtitle("NGF 10min pulse") + theme(text = element_text(size = 20))

NGF is extremely separable, notably at 0.25 ng/mL where the reponse is completely flat. Explained variance is very large with this dataset.

2.5 With FGF only, 10min pulse

perform.PCA(Yanni_sp[Condition %in% levels(Condition)[9:12] & RealTime >= 40 & RealTime <= 75]) + ggtitle("FGF 10min pulse") + theme(text = element_text(size = 20))

3 Quantify overlap of clouds in PCA

3.1 Inverse Davis Bouldin Index

4 Separability measure along time - AUC

sep.meas.along.time <- function(data1, data2, time.col, measure.col){
  timev <- unique(data1[, get(time.col)])
  if(!(identical(unique(data2[, get(time.col)]), timev))) stop("Time vectors must be identical between the two data")
  out <- separability.measures(data1[get(time.col)==timev[1], get(measure.col)], data2[get(time.col)==timev[1], get(measure.col)])
  for(t in timev[2:length(timev)]){
    out <- rbind(out, separability.measures(data1[RealTime==t, get(measure.col)], data2[RealTime==t, get(measure.col)]))
  }
  out <- cbind(timev, out)
  return(out)
}

4.1 With the 3 GF at 10min pulse

library(plyr)
# Get all pairs of conditions
conditions_EGF <- combn(as.character(unique(Yanni_sp[,Condition])[1:4]), m = 2)
conditions_NGF <- combn(as.character(unique(Yanni_sp[,Condition])[5:8]), m = 2)
conditions_FGF <- combn(as.character(unique(Yanni_sp[,Condition])[9:12]), m = 2)

conditions <- cbind(conditions_EGF, conditions_NGF, conditions_FGF)
rm(conditions_EGF, conditions_FGF, conditions_NGF)

4.1.1 With raw data

# Compute separabilities of conditions at each time point
sep.meas.raw <- apply(conditions, 2, function(x) sep.meas.along.time(Yanni_sp[Condition==x[1]],  Yanni_sp[Condition==x[2]], "RealTime", "Ratio" ))
names(sep.meas.raw) <- apply(conditions, 2, function(x) paste(x[1], x[2], sep = ","))

# Go to data table
for(i in 1:length(sep.meas.raw)){
  temp <- unlist(strsplit(names(sep.meas.raw)[i], ","))
  sep.meas.raw[[i]]$Cond1 <- temp[1]
  sep.meas.raw[[i]]$Cond2 <- temp[2]
}
sep.meas.raw <- as.data.table(rbind.fill(sep.meas.raw))
sep.meas.raw[, c("Cond1", "Cond2") := list(factor(Cond1, levels = levels(Yanni_sp$Condition)), factor(Cond2, levels = levels(Yanni_sp$Condition)))]
ggplot(sep.meas.raw, aes(x= timev, y = jm)) + geom_line() + facet_wrap(~Cond1 + Cond2, ncol = 6) + theme(text=element_text(size=25)) + ylab("Jeffries-Matusita [0, sqrt(2)]") + geom_vline(xintercept=40, colour="blue", linetype="longdash") + ggtitle("Separability along time - Raw value")

max.val <- length(unique(sep.meas.raw$timev)) * sqrt(2)
auc.raw <- sep.meas.raw[, .(auc = sum(jm, na.rm = T)/max.val), by = c("Cond1", "Cond2")]
auc.raw
##         Cond1    Cond2        auc
##  1: E0.25-P10 E2.5-P10 0.02993389
##  2: E0.25-P10  E25-P10 0.02808064
##  3: E0.25-P10 E250-P10 0.03574629
##  4:  E2.5-P10  E25-P10 0.01382519
##  5:  E2.5-P10 E250-P10 0.01761181
##  6:   E25-P10 E250-P10 0.01815805
##  7: N0.25-P10 N2.5-P10 0.04049299
##  8: N0.25-P10  N25-P10 0.12810583
##  9: N0.25-P10 N250-P10 0.29967947
## 10:  N2.5-P10  N25-P10 0.09143206
## 11:  N2.5-P10 N250-P10 0.28423385
## 12:   N25-P10 N250-P10 0.12081241
## 13: F0.25-P10 F2.5-P10 0.03393054
## 14: F0.25-P10  F25-P10 0.05182419
## 15: F0.25-P10 F250-P10 0.02097738
## 16:  F2.5-P10  F25-P10 0.02600860
## 17:  F2.5-P10 F250-P10 0.01433825
## 18:   F25-P10 F250-P10 0.04107340
  • EGF is only separable at the time of the peak, long-term response similar.

  • NGF250 is the only condition which presents a sustained state on the long-run, and consequently presents long-term discrepancies with the other conditions which are adaptative. N0.25 can be used as a reference zero.

  • FGF conditions shows the weakness of the method, as it can’t tell any condition appart whereas there’s clearly a difference between those. This is because this measure only look at the range where the data are varying. FGF response is never frankly going off from “zero”. Normalization can provide a way to amplify these differences.

4.1.2 With normalized data

# Compute separabilities of conditions at each time point
sep.meas.norm <- apply(conditions, 2, function(x) sep.meas.along.time(Yanni_sp[Condition==x[1]],  Yanni_sp[Condition==x[2]], "RealTime", "Ratio.norm" ))
names(sep.meas.norm) <- apply(conditions, 2, function(x) paste(x[1], x[2], sep = ","))

# Go to data table
for(i in 1:length(sep.meas.norm)){
  temp <- unlist(strsplit(names(sep.meas.norm)[i], ","))
  sep.meas.norm[[i]]$Cond1 <- temp[1]
  sep.meas.norm[[i]]$Cond2 <- temp[2]
}
sep.meas.norm <- as.data.table(rbind.fill(sep.meas.norm))
sep.meas.norm[, c("Cond1", "Cond2") := list(factor(Cond1, levels = levels(Yanni_sp$Condition)), factor(Cond2, levels = levels(Yanni_sp$Condition)))]
ggplot(sep.meas.norm, aes(x= timev, y = jm)) + geom_line() + facet_wrap(~Cond1 + Cond2, ncol = 6) + theme(text=element_text(size=25)) + ylab("Jeffries-Matusita [0, sqrt(2)]") + geom_vline(xintercept=40, colour="blue", linetype="longdash") + ggtitle("Separability along time - Normalized value")

As expected small differences for EGF and NGF get quite largely amplified.

max.val <- length(unique(sep.meas.norm$timev)) * sqrt(2)
auc.norm <- sep.meas.norm[, .(auc = sum(jm, na.rm = T)/max.val), by = c("Cond1", "Cond2")]
auc.norm
##         Cond1    Cond2        auc
##  1: E0.25-P10 E2.5-P10 0.06198343
##  2: E0.25-P10  E25-P10 0.06200587
##  3: E0.25-P10 E250-P10 0.05866281
##  4:  E2.5-P10  E25-P10 0.04450344
##  5:  E2.5-P10 E250-P10 0.03400763
##  6:   E25-P10 E250-P10 0.02966335
##  7: N0.25-P10 N2.5-P10 0.06888664
##  8: N0.25-P10  N25-P10 0.16752396
##  9: N0.25-P10 N250-P10 0.37405542
## 10:  N2.5-P10  N25-P10 0.09823425
## 11:  N2.5-P10 N250-P10 0.29301445
## 12:   N25-P10 N250-P10 0.14262677
## 13: F0.25-P10 F2.5-P10 0.06849387
## 14: F0.25-P10  F25-P10 0.10344252
## 15: F0.25-P10 F250-P10 0.03593476
## 16:  F2.5-P10  F25-P10 0.03487343
## 17:  F2.5-P10 F250-P10 0.03500679
## 18:   F25-P10 F250-P10 0.07120502

4.2 Permutation test

Time series must be in matrix wide format.

one.permutation.auc <- function(x, y, metric){
  n <- nrow(x)
  m <- nrow(y)
  temp <- rbind(x, y)
  samp.traj <- sample(1:nrow(temp), size = n, replace = FALSE)
  x.resamp <- temp[samp.traj, ]
  y.resamp <- temp[setdiff(1:nrow(temp), samp.traj), ]

  seps <- sapply(1:ncol(x), function(j) separability.measures(x.resamp[, j], y.resamp[, j]))
  return(sum(unlist(seps[metric, ])))
}

permutation.auc <- function(x, y, n, metric = "jm"){
  # x,y: two matrices representing time series, row: trajectory; col: time
  # n: number of permutations
  # metric: one of "jm", "bh", "div", "tdiv", "ks"
  if(ncol(x) != ncol(y)) stop("x and y must have same number of columns")
  return(replicate(n, one.permutation.auc(x,y,metric)))
}
wrap_perm <- function(x, y, measure, n, na.fill){
  a <- as.matrix(dcast(x, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  b <- as.matrix(dcast(y, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  a[which(is.na(a))] <- na.fill
  b[which(is.na(b))] <- na.fill
  return(permutation.auc(a, b, n))
}
temp <- apply(conditions, 2, function(x) wrap_perm(Yanni_sp[Condition==x[1]], Yanni_sp[Condition==x[2]], "Ratio", 500, median(Yanni_sp$Ratio)))
temp <- temp/max.val
colnames(temp) <- apply(conditions, 2, paste, collapse=" ; ")
par(mfrow=c(3,6))
for(j in 1:ncol(temp)){
  hist(temp[,j], main = colnames(temp)[j], ylab="", xlab = "", cex.main = 3, cex.axis = 3)
}

4.3 Bootstrap per-column

one.bootstrap.auc.percol <- function(x, y, metric){
  samp.col <- sample(1:ncol(x), size = ncol(x), replace = TRUE)
  x.resamp <- x[, samp.col]
  y.resamp <- y[, samp.col]
  seps <- sapply(1:ncol(x), function(j) separability.measures(x.resamp[, j], y.resamp[, j]))
  return(sum(unlist(seps[metric, ])))
}

bootstrap.auc.percol <- function(x, y, B, metric = "jm"){
  # x,y: two matrices representing time series, row: trajectory; col: time
  # B: number of boostraps
  # metric: one of "jm", "bh", "div", "tdiv", "ks"
  if(ncol(x) != ncol(y)) stop("x and y must have same number of columns")
  return(replicate(B, one.bootstrap.auc.percol(x,y,metric)))
}
wrap_bootcol <- function(x, y, measure, n, na.fill){
  a <- as.matrix(dcast(x, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  b <- as.matrix(dcast(y, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  a[which(is.na(a))] <- na.fill
  b[which(is.na(b))] <- na.fill
  return(bootstrap.auc.percol(a, b, n))
}
bootcol <- apply(conditions, 2, function(x) wrap_bootcol(Yanni_sp[Condition==x[1]], Yanni_sp[Condition==x[2]], "Ratio", 500, median(Yanni_sp$Ratio)))
bootcol <- bootcol/max.val
colnames(bootcol) <- apply(conditions, 2, paste, collapse=";")
par(mfrow=c(3,6))
for(j in 1:ncol(bootcol)){
  hist(bootcol[,j], main = colnames(bootcol)[j], ylab="", xlab = "", cex.main = 3, cex.axis = 3)
}

4.4 Bootstrap per-row

one.bootstrap.auc.perrow <- function(x, y, metric){
  samp.rowx <- sample(1:nrow(x), size = nrow(x), replace = TRUE)
  samp.rowy <- sample(1:nrow(y), size = nrow(y), replace = TRUE)
  x.resamp <- x[samp.rowx, ]
  y.resamp <- y[samp.rowy, ]
  seps <- sapply(1:ncol(x), function(j) separability.measures(x.resamp[, j], y.resamp[, j]))
  return(sum(unlist(seps[metric, ])))
}

bootstrap.auc.perrow <- function(x, y, B, metric = "jm"){
  # x,y: two matrices representing time series, row: trajectory; col: time
  # B: number of boostraps
  # metric: one of "jm", "bh", "div", "tdiv", "ks"
  if(ncol(x) != ncol(y)) stop("x and y must have same number of columns")
  return(replicate(B, one.bootstrap.auc.perrow(x,y,metric)))
}
wrap_bootrow <- function(x, y, measure, n, na.fill){
  a <- as.matrix(dcast(x, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  b <- as.matrix(dcast(y, Condition + Label ~ RealTime, value.var = measure)[,-c(1,2)])
  a[which(is.na(a))] <- na.fill
  b[which(is.na(b))] <- na.fill
  return(bootstrap.auc.perrow(a, b, n))
}
bootrow <- apply(conditions, 2, function(x) wrap_bootrow(Yanni_sp[Condition==x[1]], Yanni_sp[Condition==x[2]], "Ratio", 500, median(Yanni_sp$Ratio)))
bootrow <- bootrow/max.val
colnames(bootrow) <- apply(conditions, 2, paste, collapse=" ; ")
par(mfrow=c(3,6))
for(j in 1:ncol(bootrow)){
  hist(bootrow[,j], main = colnames(bootrow)[j], ylab="", xlab = "", cex.main = 3, cex.axis = 3)
}

5 Peak features

source("../rscripts/single_peak_features.R")

Focus on 20min after pulse for this.

Yanni_cp <- copy(Yanni_sp)
Yanni_cp <- Yanni_cp[RealTime >= 36 & RealTime <= 60]
Yanni_cp[, c("mini", "maxi", "diff.min.max", "time.min", "time.max", "max.amp", "FWHM", "left", "right", "grow.half", "grow.lag", "dec.half", "dec.exp") := lapply(FeatAllFeat(y = Ratio.norm, basal = min(Ratio.norm), start.lag.grow = 36, end.exp.dec = 60), as.numeric), by = c("Condition", "Label")]

Extract only the first row of each trajectory (avoid redundancy):

Yanni_cp[,uniqueID := paste0(Condition, Label)]
Yanni_cp <- Yanni_cp[!duplicated(Yanni_cp$uniqueID),]

Plotting mean feature with radar plots:

library(fmsb)
colstoavg <- names(Yanni_cp)[c(6:8, 10, 12, 15, 17)]
meanvec <- Yanni_cp[, lapply(.SD,mean,na.rm=TRUE), by="Condition", .SDcols=colstoavg]
# Add 2 dummy lines atop to indicate minimum and maximum of axis
maxvec <- matrix(apply(meanvec, 2, max, na.rm=T), nrow = 1, dimnames = list(c(), colnames(meanvec)))[,-1, drop=F]
minvec <- matrix(apply(meanvec, 2, min, na.rm=T), nrow = 1, dimnames = list(c(), colnames(meanvec)))[,-1, drop=F]
temp <- rbind(maxvec, minvec, meanvec[,-1])
temp <- as.data.table(apply(temp, 2, as.numeric))
par(mfrow=c(1,3))
radarchart(temp[1:6, c( "diff.min.max", "mini","maxi", "grow.half", "dec.half", "FWHM", "time.max")], axistype = 2, vlcex = 2, palcex = 1.75)
legend(x="top", legend=levels(meanvec$Condition)[1:4], seg.len=0.5, pch=1, 
    bty="n" ,lwd=3, y.intersp=0.5, horiz=T, col=1:8, cex = 2)

radarchart(temp[c(1,2,7:10), c( "diff.min.max", "mini","maxi", "grow.half", "dec.half", "FWHM", "time.max")], axistype = 2, vlcex = 2, palcex = 1.75)
legend(x="top", legend=levels(meanvec$Condition)[5:8], seg.len=0.5, pch=1, 
    bty="n" ,lwd=3, y.intersp=0.5, horiz=T, col=1:8, cex = 2)

radarchart(temp[c(1,2,11:14), c( "diff.min.max", "mini","maxi", "grow.half", "dec.half", "FWHM", "time.max")], axistype = 2, vlcex = 2, palcex = 1.75)
legend(x="top", legend=levels(meanvec$Condition)[9:12], seg.len=0.5, pch=1, 
    bty="n" ,lwd=3, y.intersp=0.5, horiz=T, col=1:8, cex = 2)

Same plots with less annotations: